from lxml import etree
html = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>aTitle发发发1</title>
</head>
<body>
    <ul id="ul1">
        <li id="item1" title="睡1">醒醒啦1</li>
        <li id="item2" title="睡2">醒醒啦2</li>
        <li id="item3" title="醒3">醒醒啦3</li>
        <li id="item4" title="睡4">醒醒啦4</li>
        <li id="item5" title="睡5">醒醒啦5</li>
    </ul>
    <ul id="ul3">
        <li class="item11">醒醒啦11</li>
        <li class="item22">醒醒啦22</li>
        <li class="item33">醒醒啦33</li>
        <li class="item44">醒醒啦44</li>
        <li class="item55">醒醒啦55</li>
    </ul>
    <p>
        hello
        i
        am
        iron
        man
    </p>
</body>
</html>
"""

tree = etree.HTML(html)
print(tree, dir(tree))
"""
xpath 语法
/ 根目录
// 任意目录 *
./ 当前目录
.// 当前目录下的任意目录 *

//li[@id]
//li[@id=item5]
//li[contains(@alt, "醒")]
//li[position()>1]
//*[id="container"]

//li[@id="item3"]/text()
//li[@id="item3"]/@title

"""

html = tree.xpath("/html")
print(html)
body = html[0].xpath("./body")
print(body)
lis = body[0].xpath(".//li")
print(lis)
p = tree.xpath("//p")
print(p)

lis = tree.xpath('//li[@id]')
print(lis)

li5 = tree.xpath('//li[@id="item5"]')
print(li5)

lis2 = tree.xpath("//li[contains(@title, '醒')]")
print(lis2)

li_gt_first = tree.xpath("//ul[@id='ul3']/li[position()>1]")
print(li_gt_first)

li3_text = tree.xpath('//li[@id="item3"]/text()')
print(li3_text)
li3_title = tree.xpath('//li[@id="item3"]/@title')
print(li3_title)
